import numpy as np
import random
import pickle
label_dict = np.load('/data_4t/Kaggle/backup/lidc/label_dict.npy').item()
keys = label_dict.keys()
keys = sorted(keys)
lidc_true_nodules = []
for key in keys:
    for label in label_dict[key]:
        scanid = key
        source = 'LIDC'
        attrs = label['attrs']
        bbox = label['bbox']
        index = len(lidc_true_nodules)
        path = str(index)

        lidc_true_nodules.append(dict(dict(scanid=scanid, source=source, bbox=bbox, index=index, path=path), **attrs))
nodule_index = list(range(len(lidc_true_nodules)))
random.shuffle(nodule_index)
for i in nodule_index:
    fold = i % 5
    lidc_true_nodules[i]['fold'] = fold
with open('/ssd_1t/wangd/kaggle_data/LIDC_true_nodules.pkl', 'wb') as output:
    pickle.dump(lidc_true_nodules, output)

for label in lidc_true_nodules:
    print(label['malignancy'], end=' ')